Render this report with ~/spinal_cord_paper/scripts/Gg_devel_scWGCNA_module_analysis_render.sh.
library(Seurat)
Attaching SeuratObject
library(WGCNA)
Loading required package: dynamicTreeCut
Loading required package: fastcluster
Attaching package: ‘fastcluster’
The following object is masked from ‘package:stats’:
hclust
Attaching package: ‘WGCNA’
The following object is masked from ‘package:stats’:
cor
library(tidyr)
library(ggplot2)
library(stringr)
library(patchwork)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ tibble 3.1.8 ✔ dplyr 1.0.10
✔ readr 1.4.0 ✔ forcats 0.5.1
✔ purrr 0.3.4
── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(cowplot)
Attaching package: ‘cowplot’
The following object is masked from ‘package:patchwork’:
align_plots
library(pheatmap)
source("~/Neuraltube/scripts/heatmap4.R")
The individual data sets are the Day 5 (Gg_D05_ctrl_seurat_070323), Day 7 (Gg_D07_ctrl_seurat_070323), and Day 10 (Gg_ctrl_1_seurat_070323) chicken spinal cord sets. The test WGCNA data are the modules calculated on the integrated data set of all three stages.
se_path <- c("Gg_D05_ctrl_seurat_070323",
"Gg_D07_ctrl_seurat_070323",
"Gg_ctrl_1_seurat_070323")
clust_col <- read.csv("~/spinal_cord_paper/annotations/broad_cluster_marker_colors.csv") %>%
rename(broad = broad_cluster) %>%
select(-marker)
Order of the broad clusters for plotting purposes.
broad_order <- c("progenitors",
"FP",
"RP",
"FP/RP",
"neurons",
"OPC",
"MFOL",
"pericytes",
"microglia",
"blood",
"vasculature"
)
my.ses <- list()
col_table <- list()
ord_levels <- list()
for (i in seq(se_path)) {
# load the data sets
my.se <- readRDS(paste0("~/spinal_cord_paper/data/", se_path[i], ".rds"))
annot <- read.csv(list.files("~/spinal_cord_paper/annotations",
pattern = str_remove(se_path[i], "_seurat_\\d{6}"),
full.names = TRUE))
if(length(table(annot$number)) != length(table(my.se$seurat_clusters))) {
stop("Number of clusters must be identical!")
}
# rename for left join
annot <- annot %>%
mutate(fine = paste(fine, number, sep = "_")) %>%
mutate(number = factor(number, levels = 1:nrow(annot))) %>%
rename(seurat_clusters = number)
# cluster order for vln plots
ord_levels[[i]] <- annot$fine[order(match(annot$broad, broad_order))]
# create index for color coding
col_table[[i]] <- annot %>%
left_join(clust_col, by = "broad") %>%
select(c("fine", "color"))
# add cluster annotation to meta data
my.se@meta.data <- my.se@meta.data %>%
rownames_to_column("rowname") %>%
left_join(annot, by = "seurat_clusters") %>%
mutate(fine = factor(fine, levels = annot$fine)) %>%
column_to_rownames("rowname")
my.ses[[i]] <- my.se
}
names(my.ses) <- c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")
names(col_table) <- c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")
names(ord_levels) <- c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")
rm(my.se, annot)
# The reference WGCNA data. We can have several, if we want to test many at the same time
WGCNA_data = list()
WGCNA_data[[1]] = readRDS("~/spinal_cord_paper/output/Gg_devel_int_scWGCNA_250723.rds")
my.wsub =list()
my.wsub[[1]]= c(1:22)
# the name of each sample, as they appear in my.files and in the metadata of the combined object
my.samplenames = c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")
# The subset of clusters in each of the corresponding samples
my.subset=list(c(1:24),
c(1:26),
c(1:22))
# This is just to add a little bit more sense to the modules, so that we don't get just a color. Corresponds to WGCNA_data
my.modulenames = list()
my.modulenames[[1]] = c(1:22)
#Subset the seurat objects if needed as defined above
for (i in 1:length(my.ses)) {
my.ses[[i]] = subset(my.ses[[i]], idents = my.subset[[i]])
}
# Take only genes that are present in the all samples
my.dcs = list()
# We go trhough each WGCNA data
for(i in 1:length(WGCNA_data)) {
#We start with complete modules
my.dc = WGCNA_data[[i]]$dynamicCols
#Remove the few genes that are not found in the other datasets
my.dc = my.dc[which(names(my.dc) %in% rownames(my.ses[[1]]@assays$RNA@data))]
my.dc = my.dc[which(names(my.dc) %in% rownames(my.ses[[2]]@assays$RNA@data))]
my.dc = my.dc[which(names(my.dc) %in% rownames(my.ses[[3]]@assays$RNA@data))]
my.dc = my.dc[which(my.dc %in% names(table(WGCNA_data[[i]]$dynamicCols))[my.wsub[[i]]])]
my.dcs[[i]] = my.dc
}
Here, we do a correlation matrix / heatmap, to see which cell
clusters group togheter. This helps us to make more detailed
dotplots.
This part of the script can still be used to compare several WGCNA
datasets in parallel.
# broad cluster color table
all_col <- do.call(rbind, col_table) %>%
rownames_to_column("sample") %>%
mutate(sample = substr(sample, 1, 11)) %>%
mutate(sample_celltype = paste(sample, fine, sep = "_")) %>%
select(c("color", "sample_celltype", "sample"))
# take the normalized data, of each single-cell object.
my.datExpr = list()
# Go trough each seurat object
for (i in 1:length(my.ses)) {
my.datExpr[[i]] = data.frame(my.ses[[i]]@assays$RNA@data)
}
# Calculate, for every cell, in every sample, the average expression of each of the modules.
my.MEs = list()
# For each set of WGCNA modules
for (i in 1:length(WGCNA_data)) {
# Make a new list
my.MEs[[i]] = list()
# Populate with data frames from each seurat object
for (j in 1:length(my.ses)) {
# Data frame of average module expression, per cell.
my.MEs[[i]][[j]] = moduleEigengenes(t(my.datExpr[[j]][names(my.dcs[[i]]),]), colors = my.dcs[[i]])
}
}
#Calculate average of expression, per sample and cell cluster
my.modavg = list()
for (i in 1:length(WGCNA_data)) {
my.modavg[[i]] = list()
for (j in 1:length(my.ses)) {
#Make the mean per cell clusters
my.modavg[[i]][[j]] = aggregate(
my.MEs[[i]][[j]]$averageExpr,
list(my.ses[[j]]@meta.data$seurat_clusters),
mean
)
# Give the cell clusters meaningful names
rownames(my.modavg[[i]][[j]]) = paste0(
my.samplenames[j],
"_",
levels(my.ses[[j]]@meta.data$fine)[my.subset[[j]]]
)
}
}
#Gather data to plot
my.wgmat = list()
for (i in 1:length(WGCNA_data)) {
#bind the expression data into one dataframe
my.wgmat[[i]] = data.table::rbindlist(my.modavg[[i]])
#Get rid of the extra column
my.wgmat[[i]] = data.frame(my.wgmat[[i]][,-1])
#Restore the rownames
rownames(my.wgmat[[i]]) = unlist(sapply(my.modavg[[i]], rownames))
}
#Get a dataframe with annotations for all the samples and colors we need.
my.metam <- list()
for (i in seq(my.ses)) {
my.metam[[i]] <- my.ses[[i]][[]]
rownames(my.metam[[i]]) <- paste0(rownames(my.metam[[i]]), "_", i)
}
my.metam <- do.call(rbind, my.metam)
my.metam$orig.ident <- str_replace_all(my.metam$orig.ident, pattern = "Gg_ctrl_1", "Gg_D10_ctrl")
# my.metam$sample_celltype = paste0(substr(my.metam$orig.ident,7,9),"_",my.metam$seurat_clusters)
my.metam$sample_celltype = paste0(my.metam$orig.ident, "_", my.metam$fine)
my.metam <- my.metam %>%
tibble::rownames_to_column(var = "cell_ID") %>%
dplyr::left_join(all_col, by = "sample_celltype") %>%
tibble::column_to_rownames(var = "cell_ID")
# get sample colors
my.colsm = c("grey", "grey30", "black")
names(my.colsm) <- c("Gg_D05_ctrl", "Gg_D07_ctrl", "Gg_D10_ctrl")
#The colors for the samples and clusters. First the closest to the heatmap. Add a white space to easy the eye and make less confussing
my.heatcols =list()
for (i in 1:length(my.wgmat)) {
my.heatcols[[i]] = as.matrix(data.frame(
cluster= as.character(all_col$color[match(rownames(my.wgmat[[i]]), all_col$sample_celltype)]),
"." = "white",
sample= as.character(my.colsm[match(all_col$sample, names(my.colsm))]))
)
}
rm(my.datExpr, my.MEs, my.dcs)
# names and colors for the heatmap annotation
annot_name <- data.frame(
"Celltypes" = all_col$sample_celltype,
"Sample" = all_col$sample,
row.names = all_col$sample_celltype)
pheat_col_table <- do.call(rbind, col_table) %>%
rownames_to_column("sample") %>%
mutate(sample = substr(sample, 1,11)) %>%
mutate(fine = paste(sample, fine, sep = "_"))
# match color table with annotation
pheat_col_table <- pheat_col_table[match(annot_name$Celltypes, pheat_col_table$fine),]
annot_col <- list(
Celltypes = pheat_col_table$color,
Sample = c(Gg_D05_ctrl = "#A4A4A4",
Gg_D07_ctrl = "#515151",
Gg_D10_ctrl = "#000000")
)
names(annot_col[[1]]) <- annot_name$Celltypes
The integrated data set on which the WGCNA is calculated. We plot it, split by the original cell types from the three original samples.
my.sec = readRDS("~/spinal_cord_paper/data/Gg_devel_int_seurat_250723.rds")
identical(rownames(my.metam), colnames(my.sec))
[1] TRUE
#Which WGCNA dataset are we using?
my.W = 1
#Choose how many groups of cell clusters we want to use. Based on the distance clustering from above
my.clcl = cutree(hclust(as.dist(1-cor(t(my.wgmat[[my.W]])))), k = 5)
my.metam$sample_celltype <- factor(my.metam$sample_celltype, levels = names(my.clcl))
#Set the identities of the integrated data, to the annotated clusters
my.sec = SetIdent(my.sec, value = my.metam$sample_celltype)
p1 <- DimPlot(
my.sec,
reduction = "tsne",
label = TRUE,
repel = TRUE,
cols = my.heatcols[[1]][,"cluster"],
split.by = "orig.ident"
) +
NoLegend()
p1
tSNE DimPlots showing the average expression of each module by stage.
for (i in seq(my.ses)) {
# prepare average expression table
tmp <- avg.mod.eigengenes %>%
tibble::rownames_to_column("cell_ID") %>%
dplyr::filter(grepl(paste0("_", i, "$"), cell_ID)) %>%
dplyr::mutate(cell_ID = stringr::str_remove_all(cell_ID, paste0("_", i))) %>%
tibble::column_to_rownames("cell_ID")
identical(rownames(tmp), colnames(my.ses[[i]]))
# add meta data to the seurat objects
my.ses[[i]] <- AddMetaData(my.ses[[i]], tmp)
}
#max and min expression per module (column max)
mod_max <- apply(avg.mod.eigengenes, MARGIN = 2, FUN = max)[module_order]
mod_min <- apply(avg.mod.eigengenes, MARGIN = 2, FUN = min)[module_order]
modplots <- list()
modplots[[1]] <- list()
modplots[[2]] <- list()
modplots[[3]] <- list()
modules_in_order <- colnames(tmp)[module_order]
# plot the modules split to the stages
for (i in seq(my.ses)) {
for (j in seq(ncol(tmp))) {
modplots[[i]][[j]] <- FeaturePlot(
my.ses[[i]],
features = modules_in_order[j],
reduction = "tsne",
cols = c("grey90", substring(modules_in_order[j], 3))
) +
ggtitle(stringr::str_remove(modules_in_order[j],"^AE")) +
scale_color_gradient(low="ivory2", high=substring(modules_in_order[j], 3), #colors in the scale
# breaks=seq(mod_min[j], mod_max[j], 0.1), #breaks in the scale bar
limits=c(mod_min[j], mod_max[j])) #same limits for plots
}
}
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
tmp <- c(modplots[[1]], modplots[[2]], modplots[[3]])
gridExtra::grid.arrange(grobs = tmp, ncol = 3, as.table = FALSE)
We plot the average expression of each module in the three stages and the 5 broad cell type clusters present in all 3 stages.
for (i in seq(my.ses)) {
my.ses[[i]]$seurat_clusters <- factor(
my.ses[[i]]$seurat_clusters,
levels = levels(my.ses[[i]]$seurat_clusters)[as.integer(str_extract(ord_levels[[i]], "\\d{1,2}$"))]
)
}
vplots <- list()
for (i in seq(my.ses)) {
mods <- colnames(my.ses[[i]][[]])[grep("^AE",colnames(my.ses[[i]][[]]))]
vplots[[i]] <- VlnPlot(
my.ses[[i]],
features = mods[module_order],
group.by = "seurat_clusters",
stack = TRUE, flip = TRUE,
cols = substring(mods, 3)[module_order]) +
theme(legend.position = "none") +
geom_hline(yintercept = 0, lty = "dashed")
}
vplots[[1]]
vplots[[2]]
vplots[[3]]
col_table[[i]]$color[as.integer(str_extract(ord_levels[[i]], "\\d{1,2}$"))]
[1] "#edc919" "#edc919" "#edc919" "#edc919" "#edc919" "#edc919" "#853335" "#99ca3c" "#cd2b91" "#cd2b91" "#cd2b91" "#cd2b91" "#008cb5" "#008cb5" "#008cb5" "#008cb5"
[17] "#008cb5" "#008cb5" "#333399" "#333399" "#f26a2d" "#bebebe"
For the figures we specifically select the two MN modules and plot them as Vln plots.
v1 <- VlnPlot(my.sec,
features = mods[module_order],
group.by = "orig.ident")
v1
pdf("~/spinal_cord_paper/figures/Fig_1_AE_by_stage.pdf", height = 20, width = 20)
v1
# Date and time of Rendering
Sys.time()
sessionInfo()